{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Pandas - 常用操作\n",
    "* 缺失数据处理\n",
    "* 处理重复数据\n",
    "* 排序\n",
    "* 删除和添加\n",
    "* 分割数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 缺失数据\n",
    "* 数据填充\n",
    "* 删除缺失数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker\n",
       "0  776.60  2015-12-29   GOOG\n",
       "1  771.00  2015-12-30   GOOG\n",
       "2  758.88  2015-12-31   GOOG\n",
       "3  741.84  2016-01-04   GOOG\n",
       "4  108.74  2015-12-29   AAPL\n",
       "5  107.32  2015-12-30   AAPL\n",
       "6  105.26  2015-12-31   AAPL\n",
       "7  105.35  2016-01-04   AAPL"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.read_csv('./data/stock.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL     NaN\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['open']=np.where(df['date']>='2015-12-30',df['close']-10,np.nan)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>100.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>100.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG  100.00\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL  100.00\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL     NaN\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL  731.84\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG  761.00\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL   97.32\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(method='bfill',inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG  761.00\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL   97.32\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.bfill()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>761.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>97.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG  761.00\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL  731.84\n",
       "5  107.32  2015-12-30   AAPL   97.32\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG     NaN\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL     NaN\n",
       "5  107.32  2015-12-30   AAPL     NaN\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['open']=np.where(df['date']>'2015-12-30',df['close']-10,np.nan)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.6</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0   776.6  2015-12-29   GOOG    GOOG\n",
       "1     771  2015-12-30   GOOG    GOOG\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL    AAPL\n",
       "5  107.32  2015-12-30   AAPL    AAPL\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(method='ffill',axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG  748.88\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL     NaN\n",
       "5  107.32  2015-12-30   AAPL   95.26\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(method='bfill',limit=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29   GOOG     NaN\n",
       "1  771.00  2015-12-30   GOOG     NaN\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29   AAPL     NaN\n",
       "5  107.32  2015-12-30   AAPL     NaN\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 删除缺失数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker\n",
       "0  776.60  2015-12-29   GOOG\n",
       "1  771.00  2015-12-30   GOOG\n",
       "2  758.88  2015-12-31   GOOG\n",
       "3  741.84  2016-01-04   GOOG\n",
       "4  108.74  2015-12-29   AAPL\n",
       "5  107.32  2015-12-30   AAPL\n",
       "6  105.26  2015-12-31   AAPL\n",
       "7  105.35  2016-01-04   AAPL"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29    NaN     NaN\n",
       "1  771.00  2015-12-30   GOOG     NaN\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29    NaN     NaN\n",
       "5  107.32  2015-12-30   AAPL     NaN\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[[0,4],2]=np.nan\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date\n",
       "0  776.60  2015-12-29\n",
       "1  771.00  2015-12-30\n",
       "2  758.88  2015-12-31\n",
       "3  741.84  2016-01-04\n",
       "4  108.74  2015-12-29\n",
       "5  107.32  2015-12-30\n",
       "6  105.26  2015-12-31\n",
       "7  105.35  2016-01-04"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "1  771.00  2015-12-30   GOOG     NaN\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "5  107.32  2015-12-30   AAPL     NaN\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(subset=['ticker'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker\n",
       "0  776.60  2015-12-29    NaN\n",
       "1  771.00  2015-12-30   GOOG\n",
       "2  758.88  2015-12-31   GOOG\n",
       "3  741.84  2016-01-04   GOOG\n",
       "4  108.74  2015-12-29    NaN\n",
       "5  107.32  2015-12-30   AAPL\n",
       "6  105.26  2015-12-31   AAPL\n",
       "7  105.35  2016-01-04   AAPL"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(subset=[5],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>776.60</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>771.00</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>758.88</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>748.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>741.84</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>GOOG</td>\n",
       "      <td>731.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>108.74</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>107.32</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>105.26</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>105.35</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>95.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    close        date ticker    open\n",
       "0  776.60  2015-12-29    NaN     NaN\n",
       "1  771.00  2015-12-30   GOOG     NaN\n",
       "2  758.88  2015-12-31   GOOG  748.88\n",
       "3  741.84  2016-01-04   GOOG  731.84\n",
       "4  108.74  2015-12-29    NaN     NaN\n",
       "5  107.32  2015-12-30   AAPL     NaN\n",
       "6  105.26  2015-12-31   AAPL   95.26\n",
       "7  105.35  2016-01-04   AAPL   95.35"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 处理重复数据\n",
    "* 寻找重复数据\n",
    "* 删除重复数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 寻找重复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.DataFrame({\n",
    "    'ticker':['AAPL','AAPL','MSFT','YAHOO','IBM','GOOG'],\n",
    "    'DATE':['2015-12-30','2015-12-30','2015-12-30','2015-12-31','2015-12-31','2015-12-29'],\n",
    "    'OPEN':[400,400,40,100,200,300]\n",
    "    \n",
    "})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "1   AAPL  2015-12-30   400"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated(keep='last')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated(keep=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "3  YAHOO  2015-12-31   100"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated(keep='last',subset=['DATE'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 删除重复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(keep='last')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "2   MSFT  2015-12-30    40\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(keep='last',subset=['DATE'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "3  YAHOO  2015-12-31   100\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(keep='first',subset=['DATE'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DATE</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-12-31</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-31</th>\n",
       "      <td>IBM</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-30</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-30</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-30</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-29</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           ticker  OPEN\n",
       "DATE                   \n",
       "2015-12-31  YAHOO   100\n",
       "2015-12-31    IBM   200\n",
       "2015-12-30   AAPL   400\n",
       "2015-12-30   AAPL   400\n",
       "2015-12-30   MSFT    40\n",
       "2015-12-29   GOOG   300"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index('DATE').sort_index(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(['DATE'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "5   GOOG  2015-12-29   300\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "4    IBM  2015-12-31   200\n",
       "3  YAHOO  2015-12-31   100"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(['DATE','OPEN'],ascending=[True,False])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 删除和添加"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "0   AAPL  2015-12-30   400\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "4    IBM  2015-12-31   200\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE\n",
       "0   AAPL  2015-12-30\n",
       "1   AAPL  2015-12-30\n",
       "2   MSFT  2015-12-30\n",
       "3  YAHOO  2015-12-31\n",
       "4    IBM  2015-12-31\n",
       "5   GOOG  2015-12-29"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop(['OPEN'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN\n",
       "1   AAPL  2015-12-30   400\n",
       "2   MSFT  2015-12-30    40\n",
       "3  YAHOO  2015-12-31   100\n",
       "5   GOOG  2015-12-29   300"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop([0,4])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 添加"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>OPEN</th>\n",
       "      <th>close</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>400</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>40</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>100</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>200</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>300</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  OPEN  close\n",
       "0   AAPL  2015-12-30   400    500\n",
       "1   AAPL  2015-12-30   400    500\n",
       "2   MSFT  2015-12-30    40    500\n",
       "3  YAHOO  2015-12-31   100    500\n",
       "4    IBM  2015-12-31   200    500\n",
       "5   GOOG  2015-12-29   300    500"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['close']=500\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>close</th>\n",
       "      <th>close1</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  close  close1  OPEN\n",
       "0   AAPL  2015-12-30    500    1000   400\n",
       "1   AAPL  2015-12-30    500    1000   400\n",
       "2   MSFT  2015-12-30    500    1000    40\n",
       "3  YAHOO  2015-12-31    500    1000   100\n",
       "4    IBM  2015-12-31    500    1000   200\n",
       "5   GOOG  2015-12-29    500    1000   300"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=df[['ticker', 'DATE', 'close','close1', 'OPEN']]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>close1</th>\n",
       "      <th>OPEN</th>\n",
       "      <th>close</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>1000</td>\n",
       "      <td>40</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>1000</td>\n",
       "      <td>100</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>1000</td>\n",
       "      <td>200</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>1000</td>\n",
       "      <td>300</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  close1  OPEN  close\n",
       "0   AAPL  2015-12-30    1000   400    500\n",
       "1   AAPL  2015-12-30    1000   400    500\n",
       "2   MSFT  2015-12-30    1000    40    500\n",
       "3  YAHOO  2015-12-31    1000   100    500\n",
       "4    IBM  2015-12-31    1000   200    500\n",
       "5   GOOG  2015-12-29    1000   300    500"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop(['close1'],axis=1,inplace=True)\n",
    "df.insert(2,'close1',1000)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\yvonn\\anaconda3\\envs\\base (root)\\lib\\site-packages\\pandas\\core\\indexing.py:671: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n"
     ]
    }
   ],
   "source": [
    "df.loc[8]=['GOOG','2015-12-22',1000,500,500]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>DATE</th>\n",
       "      <th>close</th>\n",
       "      <th>close1</th>\n",
       "      <th>OPEN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MSFT</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>YAHOO</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>IBM</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>500</td>\n",
       "      <td>1000</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-22</td>\n",
       "      <td>1000</td>\n",
       "      <td>500</td>\n",
       "      <td>500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker        DATE  close  close1  OPEN\n",
       "0   AAPL  2015-12-30    500    1000   400\n",
       "1   AAPL  2015-12-30    500    1000   400\n",
       "2   MSFT  2015-12-30    500    1000    40\n",
       "3  YAHOO  2015-12-31    500    1000   100\n",
       "4    IBM  2015-12-31    500    1000   200\n",
       "5   GOOG  2015-12-29    500    1000   300\n",
       "8   GOOG  2015-12-22   1000     500   500"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分割数据\n",
    "* 每一类数据数目基本一样\n",
    "* 自定义分割点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "[0,0,100,200,800]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 每一类数据数目基本一样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     (150.0, 500.0]\n",
       "1     (150.0, 500.0]\n",
       "2    (39.999, 150.0]\n",
       "3    (39.999, 150.0]\n",
       "4     (150.0, 500.0]\n",
       "5     (150.0, 500.0]\n",
       "8     (150.0, 500.0]\n",
       "Name: OPEN, dtype: category\n",
       "Categories (2, interval[float64]): [(39.999, 150.0] < (150.0, 500.0]]"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.qcut(df['OPEN'],q=[0,.25,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    large\n",
       "1    large\n",
       "2    small\n",
       "3    small\n",
       "4    large\n",
       "5    large\n",
       "8    large\n",
       "Name: OPEN, dtype: category\n",
       "Categories (2, object): [small < large]"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.qcut(df['OPEN'],q=[0,.25,1],labels=['small','large'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    400\n",
       "1    400\n",
       "2     40\n",
       "3    100\n",
       "4    200\n",
       "5    300\n",
       "8    500\n",
       "Name: OPEN, dtype: int64"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['OPEN']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 自定义分割点"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    (385.0, 500.0]\n",
       "1    (385.0, 500.0]\n",
       "2    (39.54, 155.0]\n",
       "3    (39.54, 155.0]\n",
       "4    (155.0, 270.0]\n",
       "5    (270.0, 385.0]\n",
       "8    (385.0, 500.0]\n",
       "Name: OPEN, dtype: category\n",
       "Categories (4, interval[float64]): [(39.54, 155.0] < (155.0, 270.0] < (270.0, 385.0] < (385.0, 500.0]]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(df['OPEN'],bins=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    (300, 400]\n",
       "1    (300, 400]\n",
       "2      (0, 100]\n",
       "3      (0, 100]\n",
       "4    (100, 200]\n",
       "5    (200, 300]\n",
       "8    (400, 500]\n",
       "Name: OPEN, dtype: category\n",
       "Categories (5, interval[int64]): [(0, 100] < (100, 200] < (200, 300] < (300, 400] < (400, 500]]"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(df['OPEN'],bins=[0,100,200,300,400,500])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    400\n",
       "1    400\n",
       "2     40\n",
       "3    100\n",
       "4    200\n",
       "5    300\n",
       "8    500\n",
       "Name: OPEN, dtype: int64"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['OPEN']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "IntervalIndex([(0, 100], (100, 200], (200, 300], (300, 400], (400, 500]],\n",
       "              closed='right',\n",
       "              dtype='interval[int64]')"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.interval_range(0,500,freq=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    (300, 400]\n",
       "1    (300, 400]\n",
       "2      (0, 100]\n",
       "3      (0, 100]\n",
       "4    (100, 200]\n",
       "5    (200, 300]\n",
       "8    (400, 500]\n",
       "Name: OPEN, dtype: category\n",
       "Categories (5, interval[int64]): [(0, 100] < (100, 200] < (200, 300] < (300, 400] < (400, 500]]"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(df['OPEN'],bins=pd.interval_range(0,500,freq=100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     l\n",
       "1     l\n",
       "2    xs\n",
       "3    xs\n",
       "4     s\n",
       "5     m\n",
       "8    xl\n",
       "Name: OPEN, dtype: category\n",
       "Categories (5, object): [xs < s < m < l < xl]"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(df['OPEN'],bins=np.arange(0,600,100),labels=['xs','s','m','l','xl'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     large\n",
       "1     large\n",
       "2     small\n",
       "3     small\n",
       "4    medium\n",
       "5    medium\n",
       "8     large\n",
       "Name: OPEN, dtype: category\n",
       "Categories (3, object): [small < medium < large]"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(df['OPEN'],bins=[0,100,300,500],labels=['small','medium','large'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    400\n",
       "1    400\n",
       "2     40\n",
       "3    100\n",
       "4    200\n",
       "5    300\n",
       "8    500\n",
       "Name: OPEN, dtype: int64"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['OPEN']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### 课后练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\n",
    "   'ticker': ['AAPL', 'AAPL', 'MSFT', 'IBM', 'YHOO'],\n",
    "    'date': ['2015-12-30', '2015-12-31', '2015-12-30', '2015-12-31', '2015-12-31'],\n",
    "    'open': [426.23, 500.51, 42.3, 101.65, 35.53]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "复制一个df，命名为df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "向前填充缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "删除包含缺失值的row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "删除包含缺失值的column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "对每个ticker，只保留最近一条记录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "删除date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "删除第一行记录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "307px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
